1/14/2019

Outline

  • R package igraph
    • Get network from files (edgelist, matrix, dataframe)
    • Visualization
      • Plotting parameters
      • Layouts
    • Network and node descriptions

Dataset

igraph

Preparation

#install.packages("igraph")
#install.packages("igraphdata")
library(igraph)
library(igraphdata)

#install.packages(dplyr)
#install.packages(tidyr)
#install.packages(stringr)

1. Get network from files

Creating network

1. Get network from files

  • graph_from_adjacency_matrix()
  • graph_from_edgelist()
  • graph_from_data_frame()

1.1 graph_from_adjacency_matrix()

Used for creating a small matrix.

The networks in real world are usually large sparse matrix and stored as a edgelist.

Binary matrix:

set.seed(2)
#sample from Bernoulli distribution with sample size 100. 
adjm <- matrix(sample(0:1, 100, replace=TRUE, prob=c(0.9,0.1)), nc=10)
adjm
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    0    0    1    0    0    0    0     1
##  [2,]    0    0    0    0    0    0    0    0    0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0
##  [4,]    0    0    0    0    0    1    0    0    0     0
##  [5,]    1    0    0    0    1    0    0    0    0     0
##  [6,]    1    0    0    0    0    0    0    0    0     0
##  [7,]    0    1    0    0    1    0    0    0    1     0
##  [8,]    0    0    0    0    0    1    0    0    0     0
##  [9,]    0    0    1    0    0    0    0    0    0     0
## [10,]    0    0    0    0    0    0    0    0    0     0
g1 <- graph_from_adjacency_matrix( adjm )
set.seed(1)
plot(g1)

#default is directed
g2 <- graph_from_adjacency_matrix( adjm ,mode = "undirected")
set.seed(1)
plot(g2)

#get rid of the self-loop (in real-world maybe self-loop does not make any sense)
g3 <- graph_from_adjacency_matrix( adjm ,mode = "undirected",diag = FALSE)
set.seed(1)
plot(g3)

Sparse matrix:

adjms=g1[]
adjms
## 10 x 10 sparse Matrix of class "dgCMatrix"
##                          
##  [1,] . . . . 1 . . . . 1
##  [2,] . . . . . . . . . .
##  [3,] . . . . . . . . . .
##  [4,] . . . . . 1 . . . .
##  [5,] 1 . . . 1 . . . . .
##  [6,] 1 . . . . . . . . .
##  [7,] . 1 . . 1 . . . 1 .
##  [8,] . . . . . 1 . . . .
##  [9,] . . 1 . . . . . . .
## [10,] . . . . . . . . . .
g4=graph_from_adjacency_matrix(adjms)
set.seed(1)
plot(g4)

Weighted matrix

set.seed(1)
adjmw <- matrix(sample(0:5, 100, replace=TRUE,
                      prob=c(0.9,0.02,0.02,0.02,0.02,0.02)), nc=10)
adjmw
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
##  [1,]    0    0    3    0    0    0    2    0    0     0
##  [2,]    0    0    0    0    0    0    0    0    0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0
##  [4,]    2    0    0    0    0    0    0    0    0     0
##  [5,]    0    0    0    0    0    0    0    0    0     0
##  [6,]    0    0    0    0    0    0    0    0    0     0
##  [7,]    4    0    0    0    0    0    0    0    0     0
##  [8,]    0    1    0    0    0    0    0    0    0     0
##  [9,]    0    0    0    0    0    0    0    0    0     0
## [10,]    0    0    0    0    0    0    0    5    0     0
g5 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE)
set.seed(1)
plot(g5)

g5
## IGRAPH 1d47339 D-W- 10 6 -- 
## + attr: weight (e/n)
## + edges from 1d47339:
## [1]  1->3  1->7  4->1  7->1  8->2 10->8
E(g5)$weight
## [1] 3 2 2 4 1 5

Named matrix

rownames(adjmw)=LETTERS[1:10]
colnames(adjmw)=LETTERS[1:10]
g6 <- graph_from_adjacency_matrix(adjmw, weighted=TRUE)
set.seed(1)
plot(g6)

1.2 graph_from_edgelist()

Most network datasets are stored as edgelists. Input is two-column matrix with each row defining one edge.

gotdf=read.csv("gotstark_lannister.csv",stringsAsFactors = FALSE)
head(gotdf,5)
##   X     Source           Target       Type weight book source.family
## 1 1 Arya-Stark     Benjen-Stark Undirected      3    1         Stark
## 2 2 Arya-Stark       Bran-Stark Undirected     14    1         Stark
## 3 3 Arya-Stark    Catelyn-Stark Undirected      5    1         Stark
## 4 4 Arya-Stark Cersei-Lannister Undirected     12    1         Stark
## 5 5 Arya-Stark          Desmond Undirected      3    1         Stark
##   target.family
## 1         Stark
## 2         Stark
## 3         Stark
## 4     Lannister
## 5          <NA>
library(dplyr)
library(tidyr)
gotdf.el=gotdf%>%select(Source,Target,weight)%>%
  group_by(Source,Target)%>%
  expand(edge=c(1:weight))%>%select(-edge)
head(gotdf.el)
## # A tibble: 6 x 2
## # Groups: Source, Target [2]
##   Source     Target      
##   <chr>      <chr>       
## 1 Arya-Stark Benjen-Stark
## 2 Arya-Stark Benjen-Stark
## 3 Arya-Stark Benjen-Stark
## 4 Arya-Stark Bran-Stark  
## 5 Arya-Stark Bran-Stark  
## 6 Arya-Stark Bran-Stark

## input need to be a matrix
got1=graph_from_edgelist(gotdf.el%>%as.matrix(),directed = FALSE)
got1
## IGRAPH 011604f UN-- 99 3374 -- 
## + attr: name (v/c)
## + edges from 011604f (vertex names):
##  [1] Arya-Stark--Benjen-Stark  Arya-Stark--Benjen-Stark 
##  [3] Arya-Stark--Benjen-Stark  Arya-Stark--Bran-Stark   
##  [5] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
##  [7] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
##  [9] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [11] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [13] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## [15] Arya-Stark--Bran-Stark    Arya-Stark--Bran-Stark   
## + ... omitted several edges
plot(got1,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.2)

Simplify the network

el <- matrix( c("foo", "bar","foo","bar", "bar", "foobar"), nc = 2, byrow = TRUE)
graph_from_edgelist(el)%>%plot()

E(got1)$weight=rep(1,ecount(got1))
got1s <- igraph::simplify( got1, remove.multiple = T, remove.loops = F, 
                 edge.attr.comb=c(weight="sum"))
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

Short name

library(stringr)
nameshort=V(got1s)$name%>%
  str_split(.,"-",simplify = TRUE)%>%
  .[,1]
V(got1s)$name[1:3]
## [1] "Arya-Stark"   "Benjen-Stark" "Bran-Stark"
nameshort[1:3]
## [1] "Arya"   "Benjen" "Bran"
V(got1s)$name=nameshort
plot(got1s,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

1.3 graph_from_data_frame()

Most common and useful.

d: a data frame containing a symbolic edge list in the first two columns. Additional columns are considered as edge attributes.

vertices: A data frame with vertex metadata

head(gotdf,5)
##   X     Source           Target       Type weight book source.family
## 1 1 Arya-Stark     Benjen-Stark Undirected      3    1         Stark
## 2 2 Arya-Stark       Bran-Stark Undirected     14    1         Stark
## 3 3 Arya-Stark    Catelyn-Stark Undirected      5    1         Stark
## 4 4 Arya-Stark Cersei-Lannister Undirected     12    1         Stark
## 5 5 Arya-Stark          Desmond Undirected      3    1         Stark
##   target.family
## 1         Stark
## 2         Stark
## 3         Stark
## 4     Lannister
## 5          <NA>
gotdf=gotdf%>%select(-X)
got2=graph_from_data_frame(d=gotdf,directed = FALSE)
got2
## IGRAPH 436d7b1 UNW- 99 238 -- 
## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n),
## | source.family (e/c), target.family (e/c)
## + edges from 436d7b1 (vertex names):
##  [1] Arya-Stark--Benjen-Stark       Arya-Stark--Bran-Stark        
##  [3] Arya-Stark--Catelyn-Stark      Arya-Stark--Cersei-Lannister  
##  [5] Arya-Stark--Desmond            Arya-Stark--Eddard-Stark      
##  [7] Arya-Stark--Ilyn-Payne         Arya-Stark--Jeyne-Poole       
##  [9] Arya-Stark--Joffrey-Baratheon  Arya-Stark--Jon-Snow          
## [11] Arya-Stark--Jory-Cassel        Arya-Stark--Meryn-Trant       
## [13] Arya-Stark--Mordane            Arya-Stark--Mycah             
## + ... omitted several edges
plot(got2,edge.arrow.size=.5, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

get dataframe, matrix or adgelist from igraph object

igraph::as_data_frame(got2)%>%head(2)
##         from           to       Type weight book source.family
## 1 Arya-Stark Benjen-Stark Undirected      3    1         Stark
## 2 Arya-Stark   Bran-Stark Undirected     14    1         Stark
##   target.family
## 1         Stark
## 2         Stark
as_adjacency_matrix(got2)%>%head(2)
## [1] 0 1
as_edgelist(got2)%>%head(2)
##      [,1]         [,2]          
## [1,] "Arya-Stark" "Benjen-Stark"
## [2,] "Arya-Stark" "Bran-Stark"

read_graph, write_graph

## store in txt or csv or others 
write_graph(graph = got2,file = "g.txt",format = "edgelist")
read_graph(file = "g.txt",format = "edgelist",directed=F)
## IGRAPH 6963953 U--- 99 238 -- 
## + edges from 6963953:
##   [1] 1-- 2 1-- 3 1-- 5 1-- 6 1-- 7 1--12 1--13 1--14 1--17 1--18 1--19
##  [12] 1--20 1--21 1--22 1--23 1--24 1--25 1--26 1--27 1--28 1--29 1--30
##  [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--13 2--15 2--21 2--28
##  [34] 2--35 2--36 2--37 2--38 2--39 2--40 2--41 3-- 5 3-- 6 3-- 7 3--12
##  [45] 3--13 3--14 3--15 3--20 3--21 3--22 3--27 3--28 3--29 3--33 3--35
##  [56] 3--37 3--38 3--40 3--42 3--43 3--44 3--45 3--46 3--47 3--48 3--49
##  [67] 3--50 3--51 3--52 3--53 4-- 7 4--11 4--27 4--28 4--52 5-- 6 5-- 7
##  [78] 5-- 8 5--12 5--13 5--14 5--15 5--16 5--20 5--21 5--27 5--28 5--29
##  [89] 5--38 5--40 5--43 5--46 5--51 5--54 5--55 5--56 5--57 5--58 5--59
## + ... omitted several edges
## store the whole graph
write_graph(got2,file = "gg",format = "pajek")
read_graph(file="gg",format="pajek")
## IGRAPH fb32cbf U-W- 99 238 -- 
## + attr: weight (e/n)
## + edges from fb32cbf:
##  [1] 1-- 2 1-- 3 1-- 5 1-- 6 1--17 1-- 7 1--18 1--19 1--20 1--21 1--22
## [12] 1--23 1--24 1--25 1--26 1--27 1--12 1--13 1--28 1--29 1--30 1--14
## [23] 1--31 1--32 1--33 1--34 1--35 2-- 3 2-- 6 2--36 2--37 2--21 2--38
## [34] 2--39 2--13 2--28 2--40 2--15 2--41 2--35 3-- 5 3-- 6 3-- 7 3--42
## [45] 3--43 3--44 3--45 3--37 3--20 3--46 3--21 3--22 3--47 3--38 3--48
## [56] 3--49 3--27 3--50 3--51 3--52 3--12 3--13 3--28 3--29 3--14 3--53
## [67] 3--40 3--33 3--15 3--35 4-- 7 4--11 4--27 4--52 4--28 5-- 6 5--54
## [78] 5--55 5-- 7 5--56 5--57 5--43 5--58 5-- 8 5--20 5--46 5--21 5--59
## + ... omitted several edges
got2
## IGRAPH 436d7b1 UNW- 99 238 -- 
## + attr: name (v/c), Type (e/c), weight (e/n), book (e/n),
## | source.family (e/c), target.family (e/c)
## + edges from 436d7b1 (vertex names):
##  [1] Arya-Stark--Benjen-Stark       Arya-Stark--Bran-Stark        
##  [3] Arya-Stark--Catelyn-Stark      Arya-Stark--Cersei-Lannister  
##  [5] Arya-Stark--Desmond            Arya-Stark--Eddard-Stark      
##  [7] Arya-Stark--Ilyn-Payne         Arya-Stark--Jeyne-Poole       
##  [9] Arya-Stark--Joffrey-Baratheon  Arya-Stark--Jon-Snow          
## [11] Arya-Stark--Jory-Cassel        Arya-Stark--Meryn-Trant       
## [13] Arya-Stark--Mordane            Arya-Stark--Mycah             
## + ... omitted several edges

2. Visualization

2. Visualization

  • Plotting parameters: mapping important attributes to visual properties
  • Find a good layout
?igraph.plotting

2.1 Plotting parameters

plot(got2, vertex.color="gold", vertex.size=3, 
     vertex.frame.color="gray", vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_lgl)

To make the graph look nicer

  • Node color: using family name
  • Node size: degree
  • Edge width: weight
## store the fullname
fullnames=V(got2)$name
fullnames[1:3]
## [1] "Arya-Stark"   "Benjen-Stark" "Bran-Stark"
#get family name
familynames=fullnames%>%str_split("-",simplify = TRUE)%>%.[,2]
familynames[familynames==""]="None"
familynames[familynames=="(guard)"]="None"
# add vertices attributes
V(got2)$familyname=familynames
V(got2)$fullname=fullnames
V(got2)$name=nameshort # first name

Set colors and legend.

  • pch: plotting symbols appearing in the legend
  • pt.bg: background color for point
  • cex: text size
  • pt.cex: point size
  • ncol: number of columns of the legend
  • bty: "o"– rectangle box; "n" – no box
vcol=V(got2)$familyname
vcol[(vcol!="Stark")&(vcol!="Lannister")]="gray50"
vcol[vcol=="Stark"]="tomato"
vcol[vcol=="Lannister"]="gold"
V(got2)$color=vcol
V(got2)$size=degree(got2)%>%log()*4
E(got2)$width=E(got2)$weight%>%log()/2
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5, vertex.label.dist=2, edge.curved=0.5,layout=layout_with_kk)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
       col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)

Plot only labels of the nodes

2.2 Layouts

Force-directed layouts: suitable for general, small to medium sized graphs. (computational complexity; based on physical analogies)

  • layout_with_fr: Fruchterman-Reingold is one of the most used force-directed layout algorithms. Force-directed layouts try to get a nice-looking graph where edges are similar in length and cross each other as little as possible. As a result, nodes are evenly distributed through the chart area, and the layout is intuitive in that nodes which share more connections are closer to each other.
  • layout_with_kk: Another popular force-directed algorithm that produces nice results for connected graphs is Kamada Kawai.
  • layout_with_graphopt: …

For large graphs:

  • layout_with_lgl: The LGL algorithm is meant for large, connected graphs. Here you can also specify a root: a node that will be placed in the middle of the layout.
  • layout_with_drl:
  • layout_with_gfr:

  • layout_with_dh:simulated annealing algorithm by Davidson and Harel
#layout_with_dh
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Other"), pch=21,
       col=c("tomato","gold","gray50"), pt.bg=c("tomato","gold","gray50"), pt.cex=1, cex=.8, bty="n", ncol=1)

Selecting a layout automatically

  • connected and vcount<=100: kk
  • vcount<=1000:fr
  • else: drl
plot(got2, vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout.auto(got2))

Without label and color the edge.

set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)

##color the edge
got2
## IGRAPH 436d7b1 UNW- 99 238 -- 
## + attr: name (v/c), familyname (v/c), fullname (v/c), color (v/c),
## | size (v/n), Type (e/c), weight (e/n), book (e/n), source.family
## | (e/c), target.family (e/c), width (e/n)
## + edges from 436d7b1 (vertex names):
##  [1] Arya--Benjen  Arya--Bran    Arya--Cersei  Arya--Desmond Arya--Petyr  
##  [6] Arya--Eddard  Arya--Rickon  Arya--Robb    Arya--Robert  Arya--Rodrik 
## [11] Arya--Sandor  Arya--Sansa   Arya--Syrio   Arya--Tomard  Arya--Tommen 
## [16] Arya--Vayon   Arya--Jory    Arya--Meryn   Arya--Yoren   Arya--Jaremy 
## [21] Arya--Jeor    Arya--Mordane Arya--Luwin   Arya--Mance   Arya--Theon  
## [26] Arya--Tyrion  Arya--Waymar 
## + ... omitted several edges
ecol=rep("gray50",ecount(got2))
ecol[E(got2)$source.family=="Stark"]="tomato"
ecol[E(got2)$source.family=="Lannister"]="gold"
ecol[(ecol=="tomato")&(E(got2)$target.family=="Lannister")&(!is.na(E(got2)$target.family))]="orange"
ecol[(ecol=="gold")&(E(got2)$target.family=="Stark")&(!is.na(E(got2)$target.family))]="orange"

set.seed(2)
plot(got2, vertex.shape="none",vertex.label.color="black", edge.color=ecol,
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=layout_with_dh)
legend("right", legend = c("Stark","Lannister","Stark-Lannister","Other"),
       col=c("tomato","gold","orange","gray50"), lty=rep(1,4), cex=.8, bty="n", ncol=1)

layout is not deterministic

Different runs will result in slightly different configurations. Saving the layout or set.seed allows us to get the exact same result multiple times, which can be helpful if you want to plot the time evolution of a graph, or different relationships – and want nodes to stay in the same place in multiple plots.

set.seed(1)
l=layout_with_dh(got2)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l)

rescale

  • norm_coords
  • rescale=F
  • can use layout=l*2
l=layout_with_fr(got2)
l <- norm_coords(l, ymin=-1, ymax=1, xmin=-1, xmax=1) #default -- scaled
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l,rescale=F)

Will introduce interactive r packages next time.

par(mfrow=c(2,2), mar=c(0,0,0,0))
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.5,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*0.8,rescale=F)
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*1,rescale=F) 
plot(got2, vertex.shape="none",vertex.label.color="black", 
     vertex.label.cex=.5,vertex.label.dist=0.2, edge.curved=0.5,layout=l*2,rescale=F)

#dev.off()

3. Network and node descriptions

  • Density: edge_density
  • Degree: degree
  • centrality and centralization:
    • centr_degree
    • closeness, centr_clo
    • eigen_centrality, centr_eigen
    • betweenness, edge_betweenness, centr_betw
  • reciprocity,transitivity,diameter,…

Density

The proportion of present edges from all possible ties.

edge_density(got2, loops=F)
## [1] 0.04906205
ecount(got2)/(vcount(got2)*(vcount(got2)-1))*2 #for an undirected network
## [1] 0.04906205

Node degrees

'degree' has a mode of 'in' for in-degree, 'out' for out-degree, and 'all' or 'total' for total degree.

Notice the graph is undirected. So there is no difference under different parameter setting.

deg <- degree(got2, mode="all")
hist(deg, breaks=1:vcount(got2)-1, main="Histogram of node degree")

deg.dist <- degree_distribution(got2, cumulative=T, mode="all")
plot( x=0:max(deg), y=1-deg.dist, pch=19, cex=1.2, col="orange", 
      xlab="Degree", ylab="Cumulative Frequency")

centrality and centralization

Who is the most important character?

  • Degree
  • Closeness
  • Eigenvector
  • Betweeness

Degree (number of ties).

Normalization should be the max degree the network can get

degree(got2, mode="in",loops = F)%>%sort(decreasing = TRUE)%>%.[1:5]
##  Eddard  Cersei    Bran    Arya Desmond 
##      56      41      32      27      27
#Notice this is undirected network, the choice of mode does not matter
centr_degree(got2, mode="in", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
centr_degree(got2, mode="all", normalized=T,loops = F)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 56 41 32 27 27
#Pay attention to whether allowing self-loop or not
# Normalization may differ due to the setting
centr_degree(got2, mode="all", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = F)$theoretical_max
## [1] 9506
centr_degree(got2, mode="in", normalized=T,loops = T)$theoretical_max
## [1] 9702

Closeness (centrality based on distance to others in the graph) Inverse of the node's average geodesic distance to others in the network

#whether to include weight or not
#If a graph has edge attribute weight, the weight will be automatically took into consideration
closeness(got2, mode="all", weights=NA) %>%sort(decreasing = TRUE)%>%.[1:5]
##      Eddard      Cersei        Bran        Arya     Desmond 
## 0.006993007 0.006329114 0.006097561 0.005882353 0.005847953
closeness(got2, mode="all")%>%sort(decreasing = TRUE)%>%.[1:5]
##       Eddard       Cersei       Donnel         Bran         Arya 
## 0.0010245902 0.0010141988 0.0010080645 0.0010030090 0.0009852217
centr_clo(got2, mode="all", normalized=T)$res %>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 0.6853147 0.6202532 0.5975610 0.5764706 0.5730994

Eigenvector (centrality proportional to the sum of connection centralities) Values of the first eigenvector of the graph adjacency matrix

eigen_centrality(got2, directed=F, weights=NA)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard    Cersei      Bran   Desmond      Arya 
## 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883
eigen_centrality(got2, directed=F)$vector%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard     Yoren   Desmond    Cersei     Vayon 
## 1.0000000 0.8538947 0.4281666 0.3352669 0.2441671
centr_eigen(got2, directed=F, normalized=T) $vector%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 1.0000000 0.8163499 0.7410532 0.7276696 0.6740883

Betweenness (centrality based on a broker position connecting others) (Number of geodesics that pass through the node or the edge)

betweenness(got2, directed=F, weights=NA)%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard    Cersei      Bran      Arya     Meryn 
## 2155.2656 1554.1678  915.6561  510.5637  366.8074
betweenness(got2, directed=F)%>%sort(decreasing = TRUE)%>%.[1:5]
##    Eddard    Cersei      Bran    Benjen      Arya 
## 1835.5000 1483.2500 1024.8571  694.4762  689.5833
edge_betweenness(got2, directed=F, weights=NA)%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 426.4643 271.6982 198.3379 150.0371 133.8635
centr_betw(got2, directed=F, normalized=T)$res%>%sort(decreasing = TRUE)%>%.[1:5]
## [1] 2155.2656 1554.1678  915.6561  510.5637  366.8074

Other properties

  • transitivity
  • reciprocity
  • clustering coefficient

Exercise

Exercise

  • Download dataset to get network in book3 https://github.com/mathbeveridge/asoiaf.
  • Only keep the nodes with degree over 5.
  • Only keep the nodes connected to family "Stark", "Targaryen" and "Lannister".
  • Present the network nicely
  • List the top 5 most important character.